* Housekeeping
set more off
timer clear

* Working paths
global base "/directory"
global DoPth "$base/dofiles/"
global PthIn "$base/data/" 
global PthOut "$base/output/"

* Globals and locals
global starty 1976
global startyplus 1977
global endy 2011

*************************
*** DATA CONSTRUCTION ***
*************************

******************************************************
*** CREATING LBD FIRM-LEVEL EMPLOYMENT MASTER FILE ***
******************************************************

* - Starting point: firm-level LBD files (2016 vintage)
use "$PthIn/LBD/lbd_firm_1976.dta", clear
forvalues year = $startyplus(1)$endy  {
	
	append using "$PthIn/LBD/lbd_firm_`year'.dta"
	
}
sort firmid year
save "$PthOut/firm_LBD_master2.dta", replace

***********************************
*** CREATION OF BASIC VARIABLES ***
***********************************

*** Firm-level growth measures
* Note: - Need to calculate year-to-year growth rates
*	- LBD-F is not necessarily balanced across and within firms
*		- Growth rates need to explicitly incorporate that the difference between two consecutive years is exactly one

*** Consider three types of growth rates
* - "growth": g(t) = (x(t) - x(t-1))/x(t-1)
* - "lngrowth": g(t) = ln(x(t)/x(t-1))
* - "DHSgrowth": g(t) = (x(t) - x(t-1))/(0.5*(x(t)+x(t-1)))

* Growth rate conditional on base year t-1
* Note: - Use this specification as baseline (for both "blocking" and "rolling" methodologies)
bys firmid: gen emp_gr2 = (emp[_n]-emp[_n-1])/emp[_n-1] if year[_n]==year[_n-1]+1
bys firmid: gen emp_gr_ln2 = ln(emp[_n]/emp[_n-1]) if year[_n]==year[_n-1]+1
bys firmid: gen emp_gr_DHS2 = (emp[_n]-emp[_n-1])/(0.5*emp[_n]+0.5*emp[_n-1]) if year[_n]==year[_n-1]+1

bys firmid: gen emp_DHS2 = 0.5*emp[_n]+0.5*emp[_n-1] if year[_n]==year[_n-1]+1

* Track number of observations within each firm's life cycle
*	- Last observation is equal to the number of observations within each firm's life cycle
bys firmid: gen f_obs = _n

save "$PthOut/firm_LBD_master2.dta", replace

*** Determine location (county/state) and industry (3-digit NAICS) at the firm level in LBD
forvalues year = $starty(1)$endy  {
	
	disp "Year is: `year'"
	
	use "$PthIn/LBD/lbd_estab_`year'.dta", clear

	* No use for observations with missing "firmid"
	drop if firmid==""
	
	* Merge Fort-Klimek time-consistent industry codes into LBD-E
	merge 1:1 lbdnum using "$PthIn/LBD/naics`year'.dta"
	drop if _merge==2
	drop _merge
	
	ren yr year
	keep lbdnum firmid year emp pay state county zip fk_naics12
	
	* Primary industry and location of firm is determined by:
	*	- Industry and location of largest (i.e., employment count) establishment within firm
	bys firmid: egen max_emp = max(emp)
	keep if emp==max_emp
	
	bys firmid: gen d_firmid = _n
	tab d_firmid
	drop d_firmid
	
	* Largest establishment (in terms of employment count) within firms is not always unique across universe of firms
	*	- Require selection procedure (in specific order) on:
	*		- Payroll, county, NAICS
	
	* Identify largest establishments (i.e., payroll) among largest establishments (in terms of employment) within a firm
	bys firmid: egen max_pay = max(pay)
	keep if pay==max_pay
	drop max_pay

	* Identify "most frequent" county among largest establishments (in terms of employment) within a firm
	gen fipscou = state + county
	bys firmid fipscou: gen d_fc = _N
	bys firmid: egen d_max = max(d_fc)
	keep if d_fc==d_max
	drop d_fc d_max
	
	* Identify "most frequent" 3-digit NAICS among largest establishments (in terms of employment) within a firm
	gen fk_naics12_3 = substr(fk_naics12,1,3)
	bys firmid fk_naics12_3: gen d_fnaics = _N
	bys firmid: egen d_max = max(d_fnaics)
	keep if d_fnaics==d_max
	drop d_fnaics d_max
	
	* Drop "multiple" observations within a firm if industry and county information are both missing
	bys firmid: gen d_firm = _N
	gen d_fill = 0
	replace d_fill = 1 if fk_naics12_3=="" & fipscou==""
	drop if d_fill==1 & d_firm>1
	drop d_fill d_firm
	
	* Random selection after previous sequence
	* Note:	- This is a very small fraction of the total sample
	bys firmid: gen d_firmid = _n
	keep if d_firmid==1
	drop d_firmid
	
	ren lbdnum lbdnum_mainestab
	
	save "$PthOut/temp/lbd`i'_locind.dta", replace

}

* Compile "location-industry" master file which includes location and industry information for all "firmid-year" combinations
use "$PthOut/temp/lbd1976_locind.dta", clear
forvalues i=$startyplus(1)$endy  {

	append using "$PthOut/temp/lbd`i'_locind.dta"
	erase "$PthOut/temp/lbd`i'_locind.dta"
	
}
erase "$PthOut/temp/lbd1976_locind.dta"

sort firmid year
keep firmid year fipscou state fk_naics12 fk_naics12_3 lbdnum_mainestab
gen merge_id = string(year) + firmid
save "$PthOut/temp/lbd_all_locind.dta", replace

* Merge "location-industry" master file into LBD-F_EMP master file
use "$PthOut/firm_LBD_master2.dta", clear

gen merge_id = string(year) + firmid
merge 1:1 merge_id using "$PthOut/temp/lbd_all_locind.dta"

drop if _merge==2
drop _merge merge_id

erase "$PthOut/temp/lbd_all_locind.dta"

*** Create bins ("emp") for different size categories
* BDS
egen binBDS = cut(emp), at(1,5,10,20,50,100,250,500,1000,2500,5000,9999) icodes
replace binBDS = 11 if emp>=10000
replace binBDS = binBDS+1
replace binBDS = 0 if emp==0
replace binBDS = . if emp==.
	
* "BDSE" (extended BDS) - allow for more detailed bins in the right tail of the size distribution
egen binBDSE = cut(emp), at(1,5,10,20,50,100,250,500,1000,2500,5000,10000,25000,50000,99999) icodes
replace binBDSE = 14 if emp>=100000
replace binBDSE = binBDSE+1
replace binBDSE = 0 if emp==0
replace binBDSE = . if emp==.

sort firmid year

save "$PthOut/firm_LBD_master2.dta", replace

****************************
*** ROLLING VOLATILITIES ***
****************************

*** Growth rates are defined as natural log differences
use "$PthOut/firm_LBD_master2.dta", clear

bys firmid: gen f_obs2 = _N

* rangestat (sd) emp_gr_ln2 , by(firmid) interval(year -4 5)
* Note: - It would be easiest to calculate rolling volatilities with the command "rangestat"
*	- However, this package is not available on the Census server 
*	- Hence, we will code up rolling volatilities in a "direct" (i.e., brute force) way

*** Observation count
* CASE OF N = 6
gen count = 6 if f_obs2==6
bys firmid: replace count = 5 if f_obs2==6 & _n==_N

* CASE OF N = 7
replace count = 7 if f_obs2==7
bys firmid: replace count = 6 if f_obs2==7 & (_n==1 | _n==6)
bys firmid: replace count = 5 if f_obs2==7 & _n==_N

* CASE OF N = 8
replace count = 8 if f_obs2==8
bys firmid: replace count = 6 if f_obs2==8 & (_n==1 | _n==_N-1)
bys firmid: replace count = 7 if f_obs2==8 & (_n==2 | _n==_N-2)
bys firmid: replace count = 5 if f_obs2==8 & _n==_N

* CASE OF N = 9
replace count = 9 if f_obs2==9
forvalues i = 1(1)3  {
	bys firmid: replace count = 5+`i' if f_obs2==9 & (_n==`i' | _n==_N-`i')
}
bys firmid: replace count = 5 if f_obs2==9 & _n==_N

* CASE OF N = 10
replace count = 10 if f_obs2==10
forvalues i = 1(1)4  {
	bys firmid: replace count = 5+`i' if f_obs2==10 & (_n==`i' | _n==_N-`i')
}
bys firmid: replace count = 5 if f_obs2==10 & _n==_N

* CASE OF N > 10
replace count = 10 if f_obs2>10
forvalues i = 1(1)4  {
	bys firmid: replace count = 5+`i' if f_obs2>10 & (_n==`i' | _n==_N-`i')
}
bys firmid: replace count = 5 if f_obs2>10 & _n==_N
gen count1 = count-1 if count>1

*** Rolling volatilities
* CASE OF N < 6
bys firmid: egen stdev_gr = sd(emp_gr_ln2) if f_obs2<6

* CASE OF N = 6
bys firmid: egen stdev_gr6 = sd(emp_gr_ln2) if f_obs2==6
bys firmid: gen rollmean = (emp_gr_ln2[_n-4]+emp_gr_ln2[_n-3]+emp_gr_ln2[_n-2]+emp_gr_ln2[_n-1]+emp_gr_ln2[_n])/count if f_obs2==6 & _n==_N
bys firmid: gen rollvol = ((emp_gr_ln2[_n-4]-rollmean)^2 + (emp_gr_ln2[_n-3]-rollmean)^2 + (emp_gr_ln2[_n-2]-rollmean)^2 + (emp_gr_ln2[_n-1]-rollmean)^2 + (emp_gr_ln2[_n]-rollmean)^2)/count1 if f_obs2==6 & _n==_N
bys firmid: replace stdev_gr6 = rollvol if f_obs2==6 & _n==_N

* CASE OF N = 7
bys firmid: egen stdev_gr7 = sd(emp_gr_ln2) if f_obs2==7

bys firmid: replace rollmean = (emp_gr_ln2[_n]+emp_gr_ln2[_n+1]+emp_gr_ln2[_n+2]+emp_gr_ln2[_n+3]+emp_gr_ln2[_n+4]+emp_gr_ln2[_n+5])/count if f_obs2==7 & _n==1
bys firmid: replace rollvol = ((emp_gr_ln2[_n]-rollmean)^2 + (emp_gr_ln2[_n+1]-rollmean)^2 + (emp_gr_ln2[_n+2]-rollmean)^2 + (emp_gr_ln2[_n+3]-rollmean)^2 + (emp_gr_ln2[_n+4]-rollmean)^2 + (emp_gr_ln2[_n+5]-rollmean)^2)/count1 if f_obs2==7 & _n==1

bys firmid: replace rollmean = (emp_gr_ln2[_n-4] + emp_gr_ln2[_n-3]+emp_gr_ln2[_n-2]+emp_gr_ln2[_n-1]+emp_gr_ln2[_n]+emp_gr_ln2[_n+1])/count if f_obs2==7 & _n==6
bys firmid: replace rollvol = ((emp_gr_ln2[_n-4]-rollmean)^2+(emp_gr_ln2[_n-3]-rollmean)^2 + (emp_gr_ln2[_n-2]-rollmean)^2 + (emp_gr_ln2[_n-1]-rollmean)^2 + (emp_gr_ln2[_n]-rollmean)^2 + (emp_gr_ln2[_n+1]-rollmean)^2)/count1 if f_obs2==7 & _n==6

bys firmid: replace rollmean = (emp_gr_ln2[_n-4] + emp_gr_ln2[_n-3]+emp_gr_ln2[_n-2]+emp_gr_ln2[_n-1]+emp_gr_ln2[_n])/count if f_obs2==7 & _n==_N
bys firmid: replace rollvol = ((emp_gr_ln2[_n-4]-rollmean)^2+(emp_gr_ln2[_n-3]-rollmean)^2 + (emp_gr_ln2[_n-2]-rollmean)^2 + (emp_gr_ln2[_n-1]-rollmean)^2 + (emp_gr_ln2[_n]-rollmean)^2)/count1 if f_obs2==7 & _n==_N

bys firmid: replace stdev_gr7 = rollvol if f_obs2==7 & (_n==1 | _n==6 | _n==_N)

* CASE OF N = 8
bys firmid: egen stdev_gr8 = sd(emp_gr_ln2) if f_obs2==8

bys firmid: replace rollmean = (emp_gr_ln2[_n]+emp_gr_ln2[_n+1]+emp_gr_ln2[_n+2]+emp_gr_ln2[_n+3]+emp_gr_ln2[_n+4]+emp_gr_ln2[_n+5])/count if f_obs2==8 & _n==1
bys firmid: replace rollvol = ((emp_gr_ln2[_n]-rollmean)^2 + (emp_gr_ln2[_n+1]-rollmean)^2 + (emp_gr_ln2[_n+2]-rollmean)^2 + (emp_gr_ln2[_n+3]-rollmean)^2 + (emp_gr_ln2[_n+4]-rollmean)^2 + (emp_gr_ln2[_n+5]-rollmean)^2)/count1 if f_obs2==8 & _n==1

bys firmid: replace rollmean = (emp_gr_ln2[_n-1]+emp_gr_ln2[_n]+emp_gr_ln2[_n+1]+emp_gr_ln2[_n+2]+emp_gr_ln2[_n+3]+emp_gr_ln2[_n+4]+emp_gr_ln2[_n+5])/count if f_obs2==8 & _n==2
bys firmid: replace rollvol = ((emp_gr_ln2[_n-1]-rollmean)^2+(emp_gr_ln2[_n]-rollmean)^2 + (emp_gr_ln2[_n+1]-rollmean)^2 + (emp_gr_ln2[_n+2]-rollmean)^2 + (emp_gr_ln2[_n+3]-rollmean)^2 + (emp_gr_ln2[_n+4]-rollmean)^2 + (emp_gr_ln2[_n+5]-rollmean)^2)/count1 if f_obs2==8 & _n==2

bys firmid: replace rollmean = (emp_gr_ln2[_n-4] + emp_gr_ln2[_n-3]+emp_gr_ln2[_n-2]+emp_gr_ln2[_n-1]+emp_gr_ln2[_n]+emp_gr_ln2[_n+1]+emp_gr_ln2[_n+2])/count if f_obs2==8 & _n==6
bys firmid: replace rollvol = ((emp_gr_ln2[_n-4]-rollmean)^2+(emp_gr_ln2[_n-3]-rollmean)^2 + (emp_gr_ln2[_n-2]-rollmean)^2 + (emp_gr_ln2[_n-1]-rollmean)^2 + (emp_gr_ln2[_n]-rollmean)^2 + (emp_gr_ln2[_n+1]-rollmean)^2 + (emp_gr_ln2[_n+2]-rollmean)^2)/count1 if f_obs2==8 & _n==6

bys firmid: replace rollmean = (emp_gr_ln2[_n-4] + emp_gr_ln2[_n-3]+ emp_gr_ln2[_n-2]+emp_gr_ln2[_n-1]+emp_gr_ln2[_n]+emp_gr_ln2[_n+1])/count if f_obs2==8 & _n==7
bys firmid: replace rollvol = ((emp_gr_ln2[_n-4]-rollmean)^2+(emp_gr_ln2[_n-3]-rollmean)^2 + (emp_gr_ln2[_n-2]-rollmean)^2 + (emp_gr_ln2[_n-1]-rollmean)^2 + (emp_gr_ln2[_n]-rollmean)^2 + (emp_gr_ln2[_n+1]-rollmean)^2)/count1 if f_obs2==8 & _n==7

bys firmid: replace rollmean = (emp_gr_ln2[_n-4] + emp_gr_ln2[_n-3]+emp_gr_ln2[_n-2]+emp_gr_ln2[_n-1]+emp_gr_ln2[_n])/count if f_obs2==8 & _n==_N
bys firmid: replace rollvol = ((emp_gr_ln2[_n-4]-rollmean)^2+(emp_gr_ln2[_n-3]-rollmean)^2 + (emp_gr_ln2[_n-2]-rollmean)^2 + (emp_gr_ln2[_n-1]-rollmean)^2 + (emp_gr_ln2[_n]-rollmean)^2)/count1 if f_obs2==8 & _n==_N

bys firmid: replace stdev_gr8 = rollvol if f_obs2==8 & (_n==1 | _n==2 | _n==6 | _n==7 |  _n==_N)

* CASE OF N = 9
bys firmid: egen stdev_gr9 = sd(emp_gr_ln2) if f_obs2==9

bys firmid: replace rollmean = (emp_gr_ln2[_n]+emp_gr_ln2[_n+1]+emp_gr_ln2[_n+2]+emp_gr_ln2[_n+3]+emp_gr_ln2[_n+4]+emp_gr_ln2[_n+5])/count if f_obs2==9 & _n==1
bys firmid: replace rollvol = ((emp_gr_ln2[_n]-rollmean)^2 + (emp_gr_ln2[_n+1]-rollmean)^2 + (emp_gr_ln2[_n+2]-rollmean)^2 + (emp_gr_ln2[_n+3]-rollmean)^2 + (emp_gr_ln2[_n+4]-rollmean)^2 + (emp_gr_ln2[_n+5]-rollmean)^2)/count1 if f_obs2==9 & _n==1

bys firmid: replace rollmean = (emp_gr_ln2[_n-1]+emp_gr_ln2[_n]+emp_gr_ln2[_n+1]+emp_gr_ln2[_n+2]+emp_gr_ln2[_n+3]+emp_gr_ln2[_n+4]+emp_gr_ln2[_n+5])/count if f_obs2==9 & _n==2
bys firmid: replace rollvol = ((emp_gr_ln2[_n-1]-rollmean)^2+(emp_gr_ln2[_n]-rollmean)^2 + (emp_gr_ln2[_n+1]-rollmean)^2 + (emp_gr_ln2[_n+2]-rollmean)^2 + (emp_gr_ln2[_n+3]-rollmean)^2 + (emp_gr_ln2[_n+4]-rollmean)^2 + (emp_gr_ln2[_n+5]-rollmean)^2)/count1 if f_obs2==9 & _n==2

bys firmid: replace rollmean = (emp_gr_ln2[_n-2]+emp_gr_ln2[_n-1]+emp_gr_ln2[_n]+emp_gr_ln2[_n+1]+emp_gr_ln2[_n+2]+emp_gr_ln2[_n+3]+emp_gr_ln2[_n+4]+emp_gr_ln2[_n+5])/count if f_obs2==9 & _n==3
bys firmid: replace rollvol = ((emp_gr_ln2[_n-2]-rollmean)^2+(emp_gr_ln2[_n-1]-rollmean)^2+(emp_gr_ln2[_n]-rollmean)^2 + (emp_gr_ln2[_n+1]-rollmean)^2 + (emp_gr_ln2[_n+2]-rollmean)^2 + (emp_gr_ln2[_n+3]-rollmean)^2 + (emp_gr_ln2[_n+4]-rollmean)^2 + (emp_gr_ln2[_n+5]-rollmean)^2)/count1 if f_obs2==9 & _n==3

bys firmid: replace rollmean = (emp_gr_ln2[_n-4] + emp_gr_ln2[_n-3]+emp_gr_ln2[_n-2]+emp_gr_ln2[_n-1]+emp_gr_ln2[_n]+emp_gr_ln2[_n+1]+emp_gr_ln2[_n+2]+emp_gr_ln2[_n+3])/count if f_obs2==9 & _n==6
bys firmid: replace rollvol = ((emp_gr_ln2[_n-4]-rollmean)^2+(emp_gr_ln2[_n-3]-rollmean)^2 + (emp_gr_ln2[_n-2]-rollmean)^2 + (emp_gr_ln2[_n-1]-rollmean)^2 + (emp_gr_ln2[_n]-rollmean)^2 + (emp_gr_ln2[_n+1]-rollmean)^2 + (emp_gr_ln2[_n+2]-rollmean)^2 + (emp_gr_ln2[_n+3]-rollmean)^2)/count1 if f_obs2==9 & _n==6

bys firmid: replace rollmean = (emp_gr_ln2[_n-4] + emp_gr_ln2[_n-3]+emp_gr_ln2[_n-2]+emp_gr_ln2[_n-1]+emp_gr_ln2[_n]+emp_gr_ln2[_n+1]+emp_gr_ln2[_n+2])/count if f_obs2==9 & _n==7
bys firmid: replace rollvol = ((emp_gr_ln2[_n-4]-rollmean)^2+(emp_gr_ln2[_n-3]-rollmean)^2 + (emp_gr_ln2[_n-2]-rollmean)^2 + (emp_gr_ln2[_n-1]-rollmean)^2 + (emp_gr_ln2[_n]-rollmean)^2 + (emp_gr_ln2[_n+1]-rollmean)^2+(emp_gr_ln2[_n+2]-rollmean)^2)/count1 if f_obs2==9 & _n==7

bys firmid: replace rollmean = (emp_gr_ln2[_n-4] + emp_gr_ln2[_n-3]+emp_gr_ln2[_n-2]+emp_gr_ln2[_n-1]+emp_gr_ln2[_n]+emp_gr_ln2[_n+1])/count if f_obs2==9 & _n==8
bys firmid: replace rollvol = ((emp_gr_ln2[_n-4]-rollmean)^2+(emp_gr_ln2[_n-3]-rollmean)^2 + (emp_gr_ln2[_n-2]-rollmean)^2 + (emp_gr_ln2[_n-1]-rollmean)^2 + (emp_gr_ln2[_n]-rollmean)^2 + (emp_gr_ln2[_n+1]-rollmean)^2)/count1 if f_obs2==9 & _n==8

bys firmid: replace rollmean = (emp_gr_ln2[_n-4] + emp_gr_ln2[_n-3]+emp_gr_ln2[_n-2]+emp_gr_ln2[_n-1]+emp_gr_ln2[_n])/count if f_obs2==9 & _n==_N
bys firmid: replace rollvol = ((emp_gr_ln2[_n-4]-rollmean)^2+(emp_gr_ln2[_n-3]-rollmean)^2 + (emp_gr_ln2[_n-2]-rollmean)^2 + (emp_gr_ln2[_n-1]-rollmean)^2 + (emp_gr_ln2[_n]-rollmean)^2)/count1 if f_obs2==9 & _n==_N

bys firmid: replace stdev_gr9 = rollvol if f_obs2==9 & (_n==1 | _n==2 | _n==3 | _n==6 | _n==7 |  _n==8 |  _n==_N)

* CASE OF N = 10
bys firmid: egen stdev_gr10 = sd(emp_gr_ln2) if f_obs2==10

bys firmid: replace rollmean = (emp_gr_ln2[_n]+emp_gr_ln2[_n+1]+emp_gr_ln2[_n+2]+emp_gr_ln2[_n+3]+emp_gr_ln2[_n+4]+emp_gr_ln2[_n+5])/count if f_obs2==10 & _n==1
bys firmid: replace rollvol = ((emp_gr_ln2[_n]-rollmean)^2 + (emp_gr_ln2[_n+1]-rollmean)^2 + (emp_gr_ln2[_n+2]-rollmean)^2 + (emp_gr_ln2[_n+3]-rollmean)^2 + (emp_gr_ln2[_n+4]-rollmean)^2 + (emp_gr_ln2[_n+5]-rollmean)^2)/count1 if f_obs2==10 & _n==1

bys firmid: replace rollmean = (emp_gr_ln2[_n-1]+emp_gr_ln2[_n]+emp_gr_ln2[_n+1]+emp_gr_ln2[_n+2]+emp_gr_ln2[_n+3]+emp_gr_ln2[_n+4]+emp_gr_ln2[_n+5])/count if f_obs2==10 & _n==2
bys firmid: replace rollvol = ((emp_gr_ln2[_n-1]-rollmean)^2+(emp_gr_ln2[_n]-rollmean)^2 + (emp_gr_ln2[_n+1]-rollmean)^2 + (emp_gr_ln2[_n+2]-rollmean)^2 + (emp_gr_ln2[_n+3]-rollmean)^2 + (emp_gr_ln2[_n+4]-rollmean)^2 + (emp_gr_ln2[_n+5]-rollmean)^2)/count1 if f_obs2==10 & _n==2

bys firmid: replace rollmean = (emp_gr_ln2[_n-2]+emp_gr_ln2[_n-1]+emp_gr_ln2[_n]+emp_gr_ln2[_n+1]+emp_gr_ln2[_n+2]+emp_gr_ln2[_n+3]+emp_gr_ln2[_n+4]+emp_gr_ln2[_n+5])/count if f_obs2==10 & _n==3
bys firmid: replace rollvol = ((emp_gr_ln2[_n-2]-rollmean)^2+(emp_gr_ln2[_n-1]-rollmean)^2+(emp_gr_ln2[_n]-rollmean)^2 + (emp_gr_ln2[_n+1]-rollmean)^2 + (emp_gr_ln2[_n+2]-rollmean)^2 + (emp_gr_ln2[_n+3]-rollmean)^2 + (emp_gr_ln2[_n+4]-rollmean)^2 + (emp_gr_ln2[_n+5]-rollmean)^2)/count1 if f_obs2==10 & _n==3

bys firmid: replace rollmean = (emp_gr_ln2[_n-3]+emp_gr_ln2[_n-2]+emp_gr_ln2[_n-1]+emp_gr_ln2[_n]+emp_gr_ln2[_n+1]+emp_gr_ln2[_n+2]+emp_gr_ln2[_n+3]+emp_gr_ln2[_n+4]+emp_gr_ln2[_n+5])/count if f_obs2==10 & _n==4
bys firmid: replace rollvol = ((emp_gr_ln2[_n-3]-rollmean)^2+(emp_gr_ln2[_n-2]-rollmean)^2+(emp_gr_ln2[_n-1]-rollmean)^2+(emp_gr_ln2[_n]-rollmean)^2 + (emp_gr_ln2[_n+1]-rollmean)^2 + (emp_gr_ln2[_n+2]-rollmean)^2 + (emp_gr_ln2[_n+3]-rollmean)^2 + (emp_gr_ln2[_n+4]-rollmean)^2 + (emp_gr_ln2[_n+5]-rollmean)^2)/count1 if f_obs2==10 & _n==4

bys firmid: replace rollmean = (emp_gr_ln2[_n-4] + emp_gr_ln2[_n-3]+emp_gr_ln2[_n-2]+emp_gr_ln2[_n-1]+emp_gr_ln2[_n]+emp_gr_ln2[_n+1]+emp_gr_ln2[_n+2]+emp_gr_ln2[_n+3]+emp_gr_ln2[_n+4])/count if f_obs2==10 & _n==6
bys firmid: replace rollvol = ((emp_gr_ln2[_n-4]-rollmean)^2+(emp_gr_ln2[_n-3]-rollmean)^2 + (emp_gr_ln2[_n-2]-rollmean)^2 + (emp_gr_ln2[_n-1]-rollmean)^2 + (emp_gr_ln2[_n]-rollmean)^2 + (emp_gr_ln2[_n+1]-rollmean)^2 + (emp_gr_ln2[_n+2]-rollmean)^2 + (emp_gr_ln2[_n+3]-rollmean)^2+(emp_gr_ln2[_n+4]-rollmean)^2)/count1 if f_obs2==10 & _n==6

bys firmid: replace rollmean = (emp_gr_ln2[_n-4] + emp_gr_ln2[_n-3]+emp_gr_ln2[_n-2]+emp_gr_ln2[_n-1]+emp_gr_ln2[_n]+emp_gr_ln2[_n+1]+emp_gr_ln2[_n+2]+emp_gr_ln2[_n+3])/count if f_obs2==10 & _n==7
bys firmid: replace rollvol = ((emp_gr_ln2[_n-4]-rollmean)^2+(emp_gr_ln2[_n-3]-rollmean)^2 + (emp_gr_ln2[_n-2]-rollmean)^2 + (emp_gr_ln2[_n-1]-rollmean)^2 + (emp_gr_ln2[_n]-rollmean)^2 + (emp_gr_ln2[_n+1]-rollmean)^2 + (emp_gr_ln2[_n+2]-rollmean)^2 + (emp_gr_ln2[_n+3]-rollmean)^2)/count1 if f_obs2==10 & _n==7

bys firmid: replace rollmean = (emp_gr_ln2[_n-4] + emp_gr_ln2[_n-3]+emp_gr_ln2[_n-2]+emp_gr_ln2[_n-1]+emp_gr_ln2[_n]+emp_gr_ln2[_n+1]+emp_gr_ln2[_n+2])/count if f_obs2==10 & _n==8
bys firmid: replace rollvol = ((emp_gr_ln2[_n-4]-rollmean)^2+(emp_gr_ln2[_n-3]-rollmean)^2 + (emp_gr_ln2[_n-2]-rollmean)^2 + (emp_gr_ln2[_n-1]-rollmean)^2 + (emp_gr_ln2[_n]-rollmean)^2 + (emp_gr_ln2[_n+1]-rollmean)^2+(emp_gr_ln2[_n+2]-rollmean)^2)/count1 if f_obs2==10 & _n==8

bys firmid: replace rollmean = (emp_gr_ln2[_n-4] + emp_gr_ln2[_n-3]+emp_gr_ln2[_n-2]+emp_gr_ln2[_n-1]+emp_gr_ln2[_n]+emp_gr_ln2[_n+1])/count if f_obs2==10 & _n==9
bys firmid: replace rollvol = ((emp_gr_ln2[_n-4]-rollmean)^2+(emp_gr_ln2[_n-3]-rollmean)^2 + (emp_gr_ln2[_n-2]-rollmean)^2 + (emp_gr_ln2[_n-1]-rollmean)^2 + (emp_gr_ln2[_n]-rollmean)^2 + (emp_gr_ln2[_n+1]-rollmean)^2)/count1 if f_obs2==10 & _n==9

bys firmid: replace rollmean = (emp_gr_ln2[_n-4] + emp_gr_ln2[_n-3]+emp_gr_ln2[_n-2]+emp_gr_ln2[_n-1]+emp_gr_ln2[_n])/count if f_obs2==10 & _n==_N
bys firmid: replace rollvol = ((emp_gr_ln2[_n-4]-rollmean)^2+(emp_gr_ln2[_n-3]-rollmean)^2 + (emp_gr_ln2[_n-2]-rollmean)^2 + (emp_gr_ln2[_n-1]-rollmean)^2 + (emp_gr_ln2[_n]-rollmean)^2)/count1 if f_obs2==10 & _n==_N

bys firmid: replace stdev_gr10 = rollvol if f_obs2==10 & (_n==1 | _n==2 | _n==3 | _n==4 | _n==6 | _n==7 |  _n==8 | _n==9 | _n==_N)

* CASE OF N > 10
bys firmid: egen stdev_gr10plus = sd(emp_gr_ln2) if f_obs2>10

bys firmid: replace rollmean = (emp_gr_ln2[_n]+emp_gr_ln2[_n+1]+emp_gr_ln2[_n+2]+emp_gr_ln2[_n+3]+emp_gr_ln2[_n+4]+emp_gr_ln2[_n+5])/count if f_obs2>10 & _n==1
bys firmid: replace rollvol = ((emp_gr_ln2[_n]-rollmean)^2 + (emp_gr_ln2[_n+1]-rollmean)^2 + (emp_gr_ln2[_n+2]-rollmean)^2 + (emp_gr_ln2[_n+3]-rollmean)^2 + (emp_gr_ln2[_n+4]-rollmean)^2 + (emp_gr_ln2[_n+5]-rollmean)^2)/count1 if f_obs2>10 & _n==1

bys firmid: replace rollmean = (emp_gr_ln2[_n-1]+emp_gr_ln2[_n]+emp_gr_ln2[_n+1]+emp_gr_ln2[_n+2]+emp_gr_ln2[_n+3]+emp_gr_ln2[_n+4]+emp_gr_ln2[_n+5])/count if f_obs2>10 & _n==2
bys firmid: replace rollvol = ((emp_gr_ln2[_n-1]-rollmean)^2+(emp_gr_ln2[_n]-rollmean)^2 + (emp_gr_ln2[_n+1]-rollmean)^2 + (emp_gr_ln2[_n+2]-rollmean)^2 + (emp_gr_ln2[_n+3]-rollmean)^2 + (emp_gr_ln2[_n+4]-rollmean)^2 + (emp_gr_ln2[_n+5]-rollmean)^2)/count1 if f_obs2>10 & _n==2

bys firmid: replace rollmean = (emp_gr_ln2[_n-2]+emp_gr_ln2[_n-1]+emp_gr_ln2[_n]+emp_gr_ln2[_n+1]+emp_gr_ln2[_n+2]+emp_gr_ln2[_n+3]+emp_gr_ln2[_n+4]+emp_gr_ln2[_n+5])/count if f_obs2>10 & _n==3
bys firmid: replace rollvol = ((emp_gr_ln2[_n-2]-rollmean)^2+(emp_gr_ln2[_n-1]-rollmean)^2+(emp_gr_ln2[_n]-rollmean)^2 + (emp_gr_ln2[_n+1]-rollmean)^2 + (emp_gr_ln2[_n+2]-rollmean)^2 + (emp_gr_ln2[_n+3]-rollmean)^2 + (emp_gr_ln2[_n+4]-rollmean)^2 + (emp_gr_ln2[_n+5]-rollmean)^2)/count1 if f_obs2>10 & _n==3

bys firmid: replace rollmean = (emp_gr_ln2[_n-3]+emp_gr_ln2[_n-2]+emp_gr_ln2[_n-1]+emp_gr_ln2[_n]+emp_gr_ln2[_n+1]+emp_gr_ln2[_n+2]+emp_gr_ln2[_n+3]+emp_gr_ln2[_n+4]+emp_gr_ln2[_n+5])/count if f_obs2>10 & _n==4
bys firmid: replace rollvol = ((emp_gr_ln2[_n-3]-rollmean)^2+(emp_gr_ln2[_n-2]-rollmean)^2+(emp_gr_ln2[_n-1]-rollmean)^2+(emp_gr_ln2[_n]-rollmean)^2 + (emp_gr_ln2[_n+1]-rollmean)^2 + (emp_gr_ln2[_n+2]-rollmean)^2 + (emp_gr_ln2[_n+3]-rollmean)^2 + (emp_gr_ln2[_n+4]-rollmean)^2 + (emp_gr_ln2[_n+5]-rollmean)^2)/count1 if f_obs2>10 & _n==4

bys firmid: replace rollmean = (emp_gr_ln2[_n-4] + emp_gr_ln2[_n-3]+emp_gr_ln2[_n-2]+emp_gr_ln2[_n-1]+emp_gr_ln2[_n]+emp_gr_ln2[_n+1]+emp_gr_ln2[_n+2]+emp_gr_ln2[_n+3]+emp_gr_ln2[_n+4])/count if f_obs2>10 & _n==_N-4
bys firmid: replace rollvol = ((emp_gr_ln2[_n-4]-rollmean)^2+(emp_gr_ln2[_n-3]-rollmean)^2 + (emp_gr_ln2[_n-2]-rollmean)^2 + (emp_gr_ln2[_n-1]-rollmean)^2 + (emp_gr_ln2[_n]-rollmean)^2 + (emp_gr_ln2[_n+1]-rollmean)^2 + (emp_gr_ln2[_n+2]-rollmean)^2 + (emp_gr_ln2[_n+3]-rollmean)^2+(emp_gr_ln2[_n+4]-rollmean)^2)/count1 if f_obs2>10 & _n==_N-4

bys firmid: replace rollmean = (emp_gr_ln2[_n-4] + emp_gr_ln2[_n-3]+emp_gr_ln2[_n-2]+emp_gr_ln2[_n-1]+emp_gr_ln2[_n]+emp_gr_ln2[_n+1]+emp_gr_ln2[_n+2]+emp_gr_ln2[_n+3])/count if f_obs2>10 & _n==_N-3
bys firmid: replace rollvol = ((emp_gr_ln2[_n-4]-rollmean)^2+(emp_gr_ln2[_n-3]-rollmean)^2 + (emp_gr_ln2[_n-2]-rollmean)^2 + (emp_gr_ln2[_n-1]-rollmean)^2 + (emp_gr_ln2[_n]-rollmean)^2 + (emp_gr_ln2[_n+1]-rollmean)^2 + (emp_gr_ln2[_n+2]-rollmean)^2 + (emp_gr_ln2[_n+3]-rollmean)^2)/count1 if f_obs2>10 & _n==_N-3

bys firmid: replace rollmean = (emp_gr_ln2[_n-4] + emp_gr_ln2[_n-3]+emp_gr_ln2[_n-2]+emp_gr_ln2[_n-1]+emp_gr_ln2[_n]+emp_gr_ln2[_n+1]+emp_gr_ln2[_n+2])/count if f_obs2>10 & _n==_N-2
bys firmid: replace rollvol = ((emp_gr_ln2[_n-4]-rollmean)^2+(emp_gr_ln2[_n-3]-rollmean)^2 + (emp_gr_ln2[_n-2]-rollmean)^2 + (emp_gr_ln2[_n-1]-rollmean)^2 + (emp_gr_ln2[_n]-rollmean)^2 + (emp_gr_ln2[_n+1]-rollmean)^2+(emp_gr_ln2[_n+2]-rollmean)^2)/count1 if f_obs2>10 & _n==_N-2

bys firmid: replace rollmean = (emp_gr_ln2[_n-4] + emp_gr_ln2[_n-3]+emp_gr_ln2[_n-2]+emp_gr_ln2[_n-1]+emp_gr_ln2[_n]+emp_gr_ln2[_n+1])/count if f_obs2>10 & _n==_N-1
bys firmid: replace rollvol = ((emp_gr_ln2[_n-4]-rollmean)^2+(emp_gr_ln2[_n-3]-rollmean)^2 + (emp_gr_ln2[_n-2]-rollmean)^2 + (emp_gr_ln2[_n-1]-rollmean)^2 + (emp_gr_ln2[_n]-rollmean)^2 + (emp_gr_ln2[_n+1]-rollmean)^2)/count1 if f_obs2>10 & _n==_N-1

bys firmid: replace rollmean = (emp_gr_ln2[_n-4] + emp_gr_ln2[_n-3]+emp_gr_ln2[_n-2]+emp_gr_ln2[_n-1]+emp_gr_ln2[_n])/count if f_obs2>10 & _n==_N
bys firmid: replace rollvol = ((emp_gr_ln2[_n-4]-rollmean)^2+(emp_gr_ln2[_n-3]-rollmean)^2 + (emp_gr_ln2[_n-2]-rollmean)^2 + (emp_gr_ln2[_n-1]-rollmean)^2 + (emp_gr_ln2[_n]-rollmean)^2)/count1 if f_obs2>10 & _n==_N

bys firmid: replace rollmean = (emp_gr_ln2[_n-4] + emp_gr_ln2[_n-3]+emp_gr_ln2[_n-2]+emp_gr_ln2[_n-1]+emp_gr_ln2[_n]+emp_gr_ln2[_n+1]+emp_gr_ln2[_n+2]+emp_gr_ln2[_n+3]+emp_gr_ln2[_n+4]+emp_gr_ln2[_n+5])/count if f_obs2>10 & _n>4 & _n<_N-4
bys firmid: replace rollvol = ((emp_gr_ln2[_n-4]-rollmean)^2+(emp_gr_ln2[_n-3]-rollmean)^2 + (emp_gr_ln2[_n-2]-rollmean)^2 + (emp_gr_ln2[_n-1]-rollmean)^2 + (emp_gr_ln2[_n]-rollmean)^2+(emp_gr_ln2[_n+1]-rollmean)^2+(emp_gr_ln2[_n+2]-rollmean)^2+(emp_gr_ln2[_n+3]-rollmean)^2+(emp_gr_ln2[_n+4]-rollmean)^2+(emp_gr_ln2[_n+5]-rollmean)^2)/count1 if f_obs2>10 & _n>4 & _n<_N-4

bys firmid: replace stdev_gr10plus = rollvol if f_obs2>10

* Construct rolling volatility measure at the "firmid-year" level
forvalues i = 6(1)10  {
	replace stdev_gr = stdev_gr`i' if f_obs2==`i'
}
replace stdev_gr = stdev_gr10plus if f_obs2>10

save "$PthOut/firm_LBD_master2_rollvol.dta", replace

*** Growth rates are defined as DHS arc elasticities
use "$PthOut/firm_LBD_master2.dta", clear

bys firmid: gen f_obs2 = _N

* CASE OF N < 6
bys firmid: egen stdev_gr = sd(emp_gr_DHS2) if f_obs2<6

* CASE OF N = 6
gen count = 6 if f_obs2==6
bys firmid: replace count = 5 if f_obs2==6 & _n==_N

* CASE OF N = 7
replace count = 7 if f_obs2==7
bys firmid: replace count = 6 if f_obs2==7 & (_n==1 | _n==6)
bys firmid: replace count = 5 if f_obs2==7 & _n==_N

* CASE OF N = 8
replace count = 8 if f_obs2==8
bys firmid: replace count = 6 if f_obs2==8 & (_n==1 | _n==_N-1)
bys firmid: replace count = 7 if f_obs2==8 & (_n==2 | _n==_N-2)
bys firmid: replace count = 5 if f_obs2==8 & _n==_N

* CASE OF N = 9
replace count = 9 if f_obs2==9
forvalues i = 1(1)3  {
	bys firmid: replace count = 5+`i' if f_obs2==9 & (_n==`i' | _n==_N-`i')
}
bys firmid: replace count = 5 if f_obs2==9 & _n==_N

* CASE OF N = 10
replace count = 10 if f_obs2==10
forvalues i = 1(1)4  {
	bys firmid: replace count = 5+`i' if f_obs2==10 & (_n==`i' | _n==_N-`i')
}
bys firmid: replace count = 5 if f_obs2==10 & _n==_N

* CASE OF N > 10
replace count = 10 if f_obs2>10
forvalues i = 1(1)4  {
	bys firmid: replace count = 5+`i' if f_obs2>10 & (_n==`i' | _n==_N-`i')
}
bys firmid: replace count = 5 if f_obs2>10 & _n==_N
gen count1 = count-1 if count>1

* CASE OF N = 6
bys firmid: egen stdev_gr6 = sd(emp_gr_DHS2) if f_obs2==6
bys firmid: gen rollmean = (emp_gr_DHS2[_n-4]+emp_gr_DHS2[_n-3]+emp_gr_DHS2[_n-2]+emp_gr_ln2[_n-1]+emp_gr_DHS2[_n])/count if f_obs2==6 & _n==_N
bys firmid: gen rollvol = ((emp_gr_DHS2[_n-4]-rollmean)^2 + (emp_gr_DHS2[_n-3]-rollmean)^2 + (emp_gr_DHS2[_n-2]-rollmean)^2 + (emp_gr_DHS2[_n-1]-rollmean)^2 + (emp_gr_DHS2[_n]-rollmean)^2)/count1 if f_obs2==6 & _n==_N
bys firmid: replace stdev_gr6 = rollvol if f_obs2==6 & _n==_N

* CASE OF N = 7
bys firmid: egen stdev_gr7 = sd(emp_gr_DHS2) if f_obs2==7

bys firmid: replace rollmean = (emp_gr_DHS2[_n]+emp_gr_DHS2[_n+1]+emp_gr_DHS2[_n+2]+emp_gr_DHS2[_n+3]+emp_gr_DHS2[_n+4]+emp_gr_DHS2[_n+5])/count if f_obs2==7 & _n==1
bys firmid: replace rollvol = ((emp_gr_DHS2[_n]-rollmean)^2 + (emp_gr_DHS2[_n+1]-rollmean)^2 + (emp_gr_DHS2[_n+2]-rollmean)^2 + (emp_gr_DHS2[_n+3]-rollmean)^2 + (emp_gr_DHS2[_n+4]-rollmean)^2 + (emp_gr_DHS2[_n+5]-rollmean)^2)/count1 if f_obs2==7 & _n==1

bys firmid: replace rollmean = (emp_gr_DHS2[_n-4] + emp_gr_DHS2[_n-3]+emp_gr_DHS2[_n-2]+emp_gr_DHS2[_n-1]+emp_gr_DHS2[_n]+emp_gr_DHS2[_n+1])/count if f_obs2==7 & _n==6
bys firmid: replace rollvol = ((emp_gr_DHS2[_n-4]-rollmean)^2+(emp_gr_DHS2[_n-3]-rollmean)^2 + (emp_gr_DHS2[_n-2]-rollmean)^2 + (emp_gr_DHS2[_n-1]-rollmean)^2 + (emp_gr_DHS2[_n]-rollmean)^2 + (emp_gr_DHS2[_n+1]-rollmean)^2)/count1 if f_obs2==7 & _n==6

bys firmid: replace rollmean = (emp_gr_DHS2[_n-4] + emp_gr_DHS2[_n-3]+emp_gr_DHS2[_n-2]+emp_gr_DHS2[_n-1]+emp_gr_DHS2[_n])/count if f_obs2==7 & _n==_N
bys firmid: replace rollvol = ((emp_gr_DHS2[_n-4]-rollmean)^2+(emp_gr_DHS2[_n-3]-rollmean)^2 + (emp_gr_DHS2[_n-2]-rollmean)^2 + (emp_gr_DHS2[_n-1]-rollmean)^2 + (emp_gr_DHS2[_n]-rollmean)^2)/count1 if f_obs2==7 & _n==_N

bys firmid: replace stdev_gr7 = rollvol if f_obs2==7 & (_n==1 | _n==6 | _n==_N)

* CASE OF N = 8
bys firmid: egen stdev_gr8 = sd(emp_gr_DHS2) if f_obs2==8

bys firmid: replace rollmean = (emp_gr_DHS2[_n]+emp_gr_DHS2[_n+1]+emp_gr_DHS2[_n+2]+emp_gr_DHS2[_n+3]+emp_gr_DHS2[_n+4]+emp_gr_DHS2[_n+5])/count if f_obs2==8 & _n==1
bys firmid: replace rollvol = ((emp_gr_DHS2[_n]-rollmean)^2 + (emp_gr_DHS2[_n+1]-rollmean)^2 + (emp_gr_DHS2[_n+2]-rollmean)^2 + (emp_gr_DHS2[_n+3]-rollmean)^2 + (emp_gr_DHS2[_n+4]-rollmean)^2 + (emp_gr_DHS2[_n+5]-rollmean)^2)/count1 if f_obs2==8 & _n==1

bys firmid: replace rollmean = (emp_gr_DHS2[_n-1]+emp_gr_DHS2[_n]+emp_gr_DHS2[_n+1]+emp_gr_DHS2[_n+2]+emp_gr_DHS2[_n+3]+emp_gr_DHS2[_n+4]+emp_gr_DHS2[_n+5])/count if f_obs2==8 & _n==2
bys firmid: replace rollvol = ((emp_gr_DHS2[_n-1]-rollmean)^2+(emp_gr_DHS2[_n]-rollmean)^2 + (emp_gr_DHS2[_n+1]-rollmean)^2 + (emp_gr_DHS2[_n+2]-rollmean)^2 + (emp_gr_DHS2[_n+3]-rollmean)^2 + (emp_gr_DHS2[_n+4]-rollmean)^2 + (emp_gr_DHS2[_n+5]-rollmean)^2)/count1 if f_obs2==8 & _n==2

bys firmid: replace rollmean = (emp_gr_DHS2[_n-4] + emp_gr_DHS2[_n-3]+emp_gr_DHS2[_n-2]+emp_gr_DHS2[_n-1]+emp_gr_DHS2[_n]+emp_gr_DHS2[_n+1]+emp_gr_DHS2[_n+2])/count if f_obs2==8 & _n==6
bys firmid: replace rollvol = ((emp_gr_DHS2[_n-4]-rollmean)^2+(emp_gr_DHS2[_n-3]-rollmean)^2 + (emp_gr_DHS2[_n-2]-rollmean)^2 + (emp_gr_DHS2[_n-1]-rollmean)^2 + (emp_gr_DHS2[_n]-rollmean)^2 + (emp_gr_DHS2[_n+1]-rollmean)^2 + (emp_gr_DHS2[_n+2]-rollmean)^2)/count1 if f_obs2==8 & _n==6

bys firmid: replace rollmean = (emp_gr_DHS2[_n-4] + emp_gr_DHS2[_n-3]+ emp_gr_DHS2[_n-2]+emp_gr_DHS2[_n-1]+emp_gr_DHS2[_n]+emp_gr_DHS2[_n+1])/count if f_obs2==8 & _n==7
bys firmid: replace rollvol = ((emp_gr_DHS2[_n-4]-rollmean)^2+(emp_gr_DHS2[_n-3]-rollmean)^2 + (emp_gr_DHS2[_n-2]-rollmean)^2 + (emp_gr_DHS2[_n-1]-rollmean)^2 + (emp_gr_DHS2[_n]-rollmean)^2 + (emp_gr_DHS2[_n+1]-rollmean)^2)/count1 if f_obs2==8 & _n==7

bys firmid: replace rollmean = (emp_gr_DHS2[_n-4] + emp_gr_DHS2[_n-3]+emp_gr_DHS2[_n-2]+emp_gr_DHS2[_n-1]+emp_gr_DHS2[_n])/count if f_obs2==8 & _n==_N
bys firmid: replace rollvol = ((emp_gr_DHS2[_n-4]-rollmean)^2+(emp_gr_DHS2[_n-3]-rollmean)^2 + (emp_gr_DHS2[_n-2]-rollmean)^2 + (emp_gr_DHS2[_n-1]-rollmean)^2 + (emp_gr_DHS2[_n]-rollmean)^2)/count1 if f_obs2==8 & _n==_N

bys firmid: replace stdev_gr8 = rollvol if f_obs2==8 & (_n==1 | _n==2 | _n==6 | _n==7 |  _n==_N)

* CASE OF N = 9
bys firmid: egen stdev_gr9 = sd(emp_gr_DHS2) if f_obs2==9

bys firmid: replace rollmean = (emp_gr_DHS2[_n]+emp_gr_DHS2[_n+1]+emp_gr_DHS2[_n+2]+emp_gr_DHS2[_n+3]+emp_gr_DHS2[_n+4]+emp_gr_DHS2[_n+5])/count if f_obs2==9 & _n==1
bys firmid: replace rollvol = ((emp_gr_DHS2[_n]-rollmean)^2 + (emp_gr_DHS2[_n+1]-rollmean)^2 + (emp_gr_DHS2[_n+2]-rollmean)^2 + (emp_gr_DHS2[_n+3]-rollmean)^2 + (emp_gr_DHS2[_n+4]-rollmean)^2 + (emp_gr_DHS2[_n+5]-rollmean)^2)/count1 if f_obs2==9 & _n==1

bys firmid: replace rollmean = (emp_gr_DHS2[_n-1]+emp_gr_DHS2[_n]+emp_gr_DHS2[_n+1]+emp_gr_DHS2[_n+2]+emp_gr_DHS2[_n+3]+emp_gr_DHS2[_n+4]+emp_gr_DHS2[_n+5])/count if f_obs2==9 & _n==2
bys firmid: replace rollvol = ((emp_gr_DHS2[_n-1]-rollmean)^2+(emp_gr_DHS2[_n]-rollmean)^2 + (emp_gr_DHS2[_n+1]-rollmean)^2 + (emp_gr_DHS2[_n+2]-rollmean)^2 + (emp_gr_DHS2[_n+3]-rollmean)^2 + (emp_gr_DHS2[_n+4]-rollmean)^2 + (emp_gr_DHS2[_n+5]-rollmean)^2)/count1 if f_obs2==9 & _n==2

bys firmid: replace rollmean = (emp_gr_DHS2[_n-2]+emp_gr_DHS2[_n-1]+emp_gr_DHS2[_n]+emp_gr_DHS2[_n+1]+emp_gr_DHS2[_n+2]+emp_gr_DHS2[_n+3]+emp_gr_DHS2[_n+4]+emp_gr_DHS2[_n+5])/count if f_obs2==9 & _n==3
bys firmid: replace rollvol = ((emp_gr_DHS2[_n-2]-rollmean)^2+(emp_gr_DHS2[_n-1]-rollmean)^2+(emp_gr_DHS2[_n]-rollmean)^2 + (emp_gr_DHS2[_n+1]-rollmean)^2 + (emp_gr_DHS2[_n+2]-rollmean)^2 + (emp_gr_DHS2[_n+3]-rollmean)^2 + (emp_gr_DHS2[_n+4]-rollmean)^2 + (emp_gr_DHS2[_n+5]-rollmean)^2)/count1 if f_obs2==9 & _n==3

bys firmid: replace rollmean = (emp_gr_DHS2[_n-4] + emp_gr_DHS2[_n-3]+emp_gr_DHS2[_n-2]+emp_gr_DHS2[_n-1]+emp_gr_DHS2[_n]+emp_gr_DHS2[_n+1]+emp_gr_DHS2[_n+2]+emp_gr_DHS2[_n+3])/count if f_obs2==9 & _n==6
bys firmid: replace rollvol = ((emp_gr_DHS2[_n-4]-rollmean)^2+(emp_gr_DHS2[_n-3]-rollmean)^2 + (emp_gr_DHS2[_n-2]-rollmean)^2 + (emp_gr_DHS2[_n-1]-rollmean)^2 + (emp_gr_DHS2[_n]-rollmean)^2 + (emp_gr_DHS2[_n+1]-rollmean)^2 + (emp_gr_DHS2[_n+2]-rollmean)^2 + (emp_gr_DHS2[_n+3]-rollmean)^2)/count1 if f_obs2==9 & _n==6

bys firmid: replace rollmean = (emp_gr_DHS2[_n-4] + emp_gr_DHS2[_n-3]+emp_gr_DHS2[_n-2]+emp_gr_DHS2[_n-1]+emp_gr_DHS2[_n]+emp_gr_DHS2[_n+1]+emp_gr_DHS2[_n+2])/count if f_obs2==9 & _n==7
bys firmid: replace rollvol = ((emp_gr_DHS2[_n-4]-rollmean)^2+(emp_gr_DHS2[_n-3]-rollmean)^2 + (emp_gr_DHS2[_n-2]-rollmean)^2 + (emp_gr_DHS2[_n-1]-rollmean)^2 + (emp_gr_DHS2[_n]-rollmean)^2 + (emp_gr_DHS2[_n+1]-rollmean)^2+(emp_gr_DHS2[_n+2]-rollmean)^2)/count1 if f_obs2==9 & _n==7

bys firmid: replace rollmean = (emp_gr_DHS2[_n-4] + emp_gr_DHS2[_n-3]+emp_gr_DHS2[_n-2]+emp_gr_DHS2[_n-1]+emp_gr_DHS2[_n]+emp_gr_DHS2[_n+1])/count if f_obs2==9 & _n==8
bys firmid: replace rollvol = ((emp_gr_DHS2[_n-4]-rollmean)^2+(emp_gr_DHS2[_n-3]-rollmean)^2 + (emp_gr_DHS2[_n-2]-rollmean)^2 + (emp_gr_DHS2[_n-1]-rollmean)^2 + (emp_gr_DHS2[_n]-rollmean)^2 + (emp_gr_DHS2[_n+1]-rollmean)^2)/count1 if f_obs2==9 & _n==8

bys firmid: replace rollmean = (emp_gr_DHS2[_n-4] + emp_gr_DHS2[_n-3]+emp_gr_DHS2[_n-2]+emp_gr_DHS2[_n-1]+emp_gr_DHS2[_n])/count if f_obs2==9 & _n==_N
bys firmid: replace rollvol = ((emp_gr_DHS2[_n-4]-rollmean)^2+(emp_gr_DHS2[_n-3]-rollmean)^2 + (emp_gr_DHS2[_n-2]-rollmean)^2 + (emp_gr_DHS2[_n-1]-rollmean)^2 + (emp_gr_DHS2[_n]-rollmean)^2)/count1 if f_obs2==9 & _n==_N

bys firmid: replace stdev_gr9 = rollvol if f_obs2==9 & (_n==1 | _n==2 | _n==3 | _n==6 | _n==7 |  _n==8 |  _n==_N)

* CASE OF N = 10
bys firmid: egen stdev_gr10 = sd(emp_gr_DHS2) if f_obs2==10

bys firmid: replace rollmean = (emp_gr_DHS2[_n]+emp_gr_DHS2[_n+1]+emp_gr_DHS2[_n+2]+emp_gr_DHS2[_n+3]+emp_gr_DHS2[_n+4]+emp_gr_DHS2[_n+5])/count if f_obs2==10 & _n==1
bys firmid: replace rollvol = ((emp_gr_DHS2[_n]-rollmean)^2 + (emp_gr_DHS2[_n+1]-rollmean)^2 + (emp_gr_DHS2[_n+2]-rollmean)^2 + (emp_gr_DHS2[_n+3]-rollmean)^2 + (emp_gr_DHS2[_n+4]-rollmean)^2 + (emp_gr_DHS2[_n+5]-rollmean)^2)/count1 if f_obs2==10 & _n==1

bys firmid: replace rollmean = (emp_gr_DHS2[_n-1]+emp_gr_DHS2[_n]+emp_gr_DHS2[_n+1]+emp_gr_DHS2[_n+2]+emp_gr_DHS2[_n+3]+emp_gr_DHS2[_n+4]+emp_gr_DHS2[_n+5])/count if f_obs2==10 & _n==2
bys firmid: replace rollvol = ((emp_gr_DHS2[_n-1]-rollmean)^2+(emp_gr_DHS2[_n]-rollmean)^2 + (emp_gr_DHS2[_n+1]-rollmean)^2 + (emp_gr_DHS2[_n+2]-rollmean)^2 + (emp_gr_DHS2[_n+3]-rollmean)^2 + (emp_gr_DHS2[_n+4]-rollmean)^2 + (emp_gr_DHS2[_n+5]-rollmean)^2)/count1 if f_obs2==10 & _n==2

bys firmid: replace rollmean = (emp_gr_DHS2[_n-2]+emp_gr_DHS2[_n-1]+emp_gr_DHS2[_n]+emp_gr_DHS2[_n+1]+emp_gr_DHS2[_n+2]+emp_gr_DHS2[_n+3]+emp_gr_DHS2[_n+4]+emp_gr_DHS2[_n+5])/count if f_obs2==10 & _n==3
bys firmid: replace rollvol = ((emp_gr_DHS2[_n-2]-rollmean)^2+(emp_gr_DHS2[_n-1]-rollmean)^2+(emp_gr_DHS2[_n]-rollmean)^2 + (emp_gr_DHS2[_n+1]-rollmean)^2 + (emp_gr_DHS2[_n+2]-rollmean)^2 + (emp_gr_DHS2[_n+3]-rollmean)^2 + (emp_gr_DHS2[_n+4]-rollmean)^2 + (emp_gr_DHS2[_n+5]-rollmean)^2)/count1 if f_obs2==10 & _n==3

bys firmid: replace rollmean = (emp_gr_DHS2[_n-3]+emp_gr_DHS2[_n-2]+emp_gr_DHS2[_n-1]+emp_gr_DHS2[_n]+emp_gr_DHS2[_n+1]+emp_gr_DHS2[_n+2]+emp_gr_DHS2[_n+3]+emp_gr_DHS2[_n+4]+emp_gr_DHS2[_n+5])/count if f_obs2==10 & _n==4
bys firmid: replace rollvol = ((emp_gr_DHS2[_n-3]-rollmean)^2+(emp_gr_DHS2[_n-2]-rollmean)^2+(emp_gr_DHS2[_n-1]-rollmean)^2+(emp_gr_DHS2[_n]-rollmean)^2 + (emp_gr_DHS2[_n+1]-rollmean)^2 + (emp_gr_DHS2[_n+2]-rollmean)^2 + (emp_gr_DHS2[_n+3]-rollmean)^2 + (emp_gr_DHS2[_n+4]-rollmean)^2 + (emp_gr_DHS2[_n+5]-rollmean)^2)/count1 if f_obs2==10 & _n==4

bys firmid: replace rollmean = (emp_gr_DHS2[_n-4] + emp_gr_DHS2[_n-3]+emp_gr_DHS2[_n-2]+emp_gr_DHS2[_n-1]+emp_gr_DHS2[_n]+emp_gr_DHS2[_n+1]+emp_gr_DHS2[_n+2]+emp_gr_DHS2[_n+3]+emp_gr_DHS2[_n+4])/count if f_obs2==10 & _n==6
bys firmid: replace rollvol = ((emp_gr_DHS2[_n-4]-rollmean)^2+(emp_gr_DHS2[_n-3]-rollmean)^2 + (emp_gr_DHS2[_n-2]-rollmean)^2 + (emp_gr_DHS2[_n-1]-rollmean)^2 + (emp_gr_DHS2[_n]-rollmean)^2 + (emp_gr_DHS2[_n+1]-rollmean)^2 + (emp_gr_DHS2[_n+2]-rollmean)^2 + (emp_gr_DHS2[_n+3]-rollmean)^2+(emp_gr_DHS2[_n+4]-rollmean)^2)/count1 if f_obs2==10 & _n==6

bys firmid: replace rollmean = (emp_gr_DHS2[_n-4] + emp_gr_DHS2[_n-3]+emp_gr_DHS2[_n-2]+emp_gr_DHS2[_n-1]+emp_gr_DHS2[_n]+emp_gr_DHS2[_n+1]+emp_gr_DHS2[_n+2]+emp_gr_DHS2[_n+3])/count if f_obs2==10 & _n==7
bys firmid: replace rollvol = ((emp_gr_DHS2[_n-4]-rollmean)^2+(emp_gr_DHS2[_n-3]-rollmean)^2 + (emp_gr_DHS2[_n-2]-rollmean)^2 + (emp_gr_DHS2[_n-1]-rollmean)^2 + (emp_gr_DHS2[_n]-rollmean)^2 + (emp_gr_DHS2[_n+1]-rollmean)^2 + (emp_gr_DHS2[_n+2]-rollmean)^2 + (emp_gr_DHS2[_n+3]-rollmean)^2)/count1 if f_obs2==10 & _n==7

bys firmid: replace rollmean = (emp_gr_DHS2[_n-4] + emp_gr_DHS2[_n-3]+emp_gr_DHS2[_n-2]+emp_gr_DHS2[_n-1]+emp_gr_DHS2[_n]+emp_gr_DHS2[_n+1]+emp_gr_DHS2[_n+2])/count if f_obs2==10 & _n==8
bys firmid: replace rollvol = ((emp_gr_DHS2[_n-4]-rollmean)^2+(emp_gr_DHS2[_n-3]-rollmean)^2 + (emp_gr_DHS2[_n-2]-rollmean)^2 + (emp_gr_DHS2[_n-1]-rollmean)^2 + (emp_gr_DHS2[_n]-rollmean)^2 + (emp_gr_DHS2[_n+1]-rollmean)^2+(emp_gr_DHS2[_n+2]-rollmean)^2)/count1 if f_obs2==10 & _n==8

bys firmid: replace rollmean = (emp_gr_DHS2[_n-4] + emp_gr_DHS2[_n-3]+emp_gr_DHS2[_n-2]+emp_gr_DHS2[_n-1]+emp_gr_DHS2[_n]+emp_gr_DHS2[_n+1])/count if f_obs2==10 & _n==9
bys firmid: replace rollvol = ((emp_gr_DHS2[_n-4]-rollmean)^2+(emp_gr_DHS2[_n-3]-rollmean)^2 + (emp_gr_DHS2[_n-2]-rollmean)^2 + (emp_gr_DHS2[_n-1]-rollmean)^2 + (emp_gr_DHS2[_n]-rollmean)^2 + (emp_gr_DHS2[_n+1]-rollmean)^2)/count1 if f_obs2==10 & _n==9

bys firmid: replace rollmean = (emp_gr_DHS2[_n-4] + emp_gr_DHS2[_n-3]+emp_gr_DHS2[_n-2]+emp_gr_DHS2[_n-1]+emp_gr_DHS2[_n])/count if f_obs2==10 & _n==_N
bys firmid: replace rollvol = ((emp_gr_DHS2[_n-4]-rollmean)^2+(emp_gr_DHS2[_n-3]-rollmean)^2 + (emp_gr_DHS2[_n-2]-rollmean)^2 + (emp_gr_DHS2[_n-1]-rollmean)^2 + (emp_gr_DHS2[_n]-rollmean)^2)/count1 if f_obs2==10 & _n==_N

bys firmid: replace stdev_gr10 = rollvol if f_obs2==10 & (_n==1 | _n==2 | _n==3 | _n==4 | _n==6 | _n==7 |  _n==8 | _n==9 | _n==_N)

* CASE OF N > 10
bys firmid: egen stdev_gr10plus = sd(emp_gr_DHS2) if f_obs2>10

bys firmid: replace rollmean = (emp_gr_DHS2[_n]+emp_gr_DHS2[_n+1]+emp_gr_DHS2[_n+2]+emp_gr_DHS2[_n+3]+emp_gr_DHS2[_n+4]+emp_gr_DHS2[_n+5])/count if f_obs2>10 & _n==1
bys firmid: replace rollvol = ((emp_gr_DHS2[_n]-rollmean)^2 + (emp_gr_DHS2[_n+1]-rollmean)^2 + (emp_gr_DHS2[_n+2]-rollmean)^2 + (emp_gr_DHS2[_n+3]-rollmean)^2 + (emp_gr_DHS2[_n+4]-rollmean)^2 + (emp_gr_DHS2[_n+5]-rollmean)^2)/count1 if f_obs2>10 & _n==1

bys firmid: replace rollmean = (emp_gr_DHS2[_n-1]+emp_gr_DHS2[_n]+emp_gr_DHS2[_n+1]+emp_gr_DHS2[_n+2]+emp_gr_DHS2[_n+3]+emp_gr_DHS2[_n+4]+emp_gr_DHS2[_n+5])/count if f_obs2>10 & _n==2
bys firmid: replace rollvol = ((emp_gr_DHS2[_n-1]-rollmean)^2+(emp_gr_DHS2[_n]-rollmean)^2 + (emp_gr_DHS2[_n+1]-rollmean)^2 + (emp_gr_DHS2[_n+2]-rollmean)^2 + (emp_gr_DHS2[_n+3]-rollmean)^2 + (emp_gr_DHS2[_n+4]-rollmean)^2 + (emp_gr_DHS2[_n+5]-rollmean)^2)/count1 if f_obs2>10 & _n==2

bys firmid: replace rollmean = (emp_gr_DHS2[_n-2]+emp_gr_DHS2[_n-1]+emp_gr_DHS2[_n]+emp_gr_DHS2[_n+1]+emp_gr_DHS2[_n+2]+emp_gr_DHS2[_n+3]+emp_gr_DHS2[_n+4]+emp_gr_DHS2[_n+5])/count if f_obs2>10 & _n==3
bys firmid: replace rollvol = ((emp_gr_DHS2[_n-2]-rollmean)^2+(emp_gr_DHS2[_n-1]-rollmean)^2+(emp_gr_DHS2[_n]-rollmean)^2 + (emp_gr_DHS2[_n+1]-rollmean)^2 + (emp_gr_DHS2[_n+2]-rollmean)^2 + (emp_gr_DHS2[_n+3]-rollmean)^2 + (emp_gr_DHS2[_n+4]-rollmean)^2 + (emp_gr_DHS2[_n+5]-rollmean)^2)/count1 if f_obs2>10 & _n==3

bys firmid: replace rollmean = (emp_gr_DHS2[_n-3]+emp_gr_DHS2[_n-2]+emp_gr_DHS2[_n-1]+emp_gr_DHS2[_n]+emp_gr_DHS2[_n+1]+emp_gr_DHS2[_n+2]+emp_gr_DHS2[_n+3]+emp_gr_DHS2[_n+4]+emp_gr_DHS2[_n+5])/count if f_obs2>10 & _n==4
bys firmid: replace rollvol = ((emp_gr_DHS2[_n-3]-rollmean)^2+(emp_gr_DHS2[_n-2]-rollmean)^2+(emp_gr_DHS2[_n-1]-rollmean)^2+(emp_gr_DHS2[_n]-rollmean)^2 + (emp_gr_DHS2[_n+1]-rollmean)^2 + (emp_gr_DHS2[_n+2]-rollmean)^2 + (emp_gr_DHS2[_n+3]-rollmean)^2 + (emp_gr_DHS2[_n+4]-rollmean)^2 + (emp_gr_DHS2[_n+5]-rollmean)^2)/count1 if f_obs2>10 & _n==4

bys firmid: replace rollmean = (emp_gr_DHS2[_n-4] + emp_gr_DHS2[_n-3]+emp_gr_DHS2[_n-2]+emp_gr_DHS2[_n-1]+emp_gr_DHS2[_n]+emp_gr_DHS2[_n+1]+emp_gr_DHS2[_n+2]+emp_gr_DHS2[_n+3]+emp_gr_DHS2[_n+4])/count if f_obs2>10 & _n==_N-4
bys firmid: replace rollvol = ((emp_gr_DHS2[_n-4]-rollmean)^2+(emp_gr_DHS2[_n-3]-rollmean)^2 + (emp_gr_DHS2[_n-2]-rollmean)^2 + (emp_gr_DHS2[_n-1]-rollmean)^2 + (emp_gr_DHS2[_n]-rollmean)^2 + (emp_gr_DHS2[_n+1]-rollmean)^2 + (emp_gr_DHS2[_n+2]-rollmean)^2 + (emp_gr_DHS2[_n+3]-rollmean)^2+(emp_gr_DHS2[_n+4]-rollmean)^2)/count1 if f_obs2>10 & _n==_N-4

bys firmid: replace rollmean = (emp_gr_DHS2[_n-4] + emp_gr_DHS2[_n-3]+emp_gr_DHS2[_n-2]+emp_gr_DHS2[_n-1]+emp_gr_DHS2[_n]+emp_gr_DHS2[_n+1]+emp_gr_DHS2[_n+2]+emp_gr_DHS2[_n+3])/count if f_obs2>10 & _n==_N-3
bys firmid: replace rollvol = ((emp_gr_DHS2[_n-4]-rollmean)^2+(emp_gr_DHS2[_n-3]-rollmean)^2 + (emp_gr_DHS2[_n-2]-rollmean)^2 + (emp_gr_DHS2[_n-1]-rollmean)^2 + (emp_gr_DHS2[_n]-rollmean)^2 + (emp_gr_DHS2[_n+1]-rollmean)^2 + (emp_gr_DHS2[_n+2]-rollmean)^2 + (emp_gr_DHS2[_n+3]-rollmean)^2)/count1 if f_obs2>10 & _n==_N-3

bys firmid: replace rollmean = (emp_gr_DHS2[_n-4] + emp_gr_DHS2[_n-3]+emp_gr_DHS2[_n-2]+emp_gr_DHS2[_n-1]+emp_gr_DHS2[_n]+emp_gr_DHS2[_n+1]+emp_gr_DHS2[_n+2])/count if f_obs2>10 & _n==_N-2
bys firmid: replace rollvol = ((emp_gr_DHS2[_n-4]-rollmean)^2+(emp_gr_DHS2[_n-3]-rollmean)^2 + (emp_gr_DHS2[_n-2]-rollmean)^2 + (emp_gr_DHS2[_n-1]-rollmean)^2 + (emp_gr_DHS2[_n]-rollmean)^2 + (emp_gr_DHS2[_n+1]-rollmean)^2+(emp_gr_DHS2[_n+2]-rollmean)^2)/count1 if f_obs2>10 & _n==_N-2

bys firmid: replace rollmean = (emp_gr_DHS2[_n-4] + emp_gr_DHS2[_n-3]+emp_gr_DHS2[_n-2]+emp_gr_DHS2[_n-1]+emp_gr_DHS2[_n]+emp_gr_DHS2[_n+1])/count if f_obs2>10 & _n==_N-1
bys firmid: replace rollvol = ((emp_gr_DHS2[_n-4]-rollmean)^2+(emp_gr_DHS2[_n-3]-rollmean)^2 + (emp_gr_DHS2[_n-2]-rollmean)^2 + (emp_gr_DHS2[_n-1]-rollmean)^2 + (emp_gr_DHS2[_n]-rollmean)^2 + (emp_gr_DHS2[_n+1]-rollmean)^2)/count1 if f_obs2>10 & _n==_N-1

bys firmid: replace rollmean = (emp_gr_DHS2[_n-4] + emp_gr_DHS2[_n-3]+emp_gr_DHS2[_n-2]+emp_gr_DHS2[_n-1]+emp_gr_DHS2[_n])/count if f_obs2>10 & _n==_N
bys firmid: replace rollvol = ((emp_gr_DHS2[_n-4]-rollmean)^2+(emp_gr_DHS2[_n-3]-rollmean)^2 + (emp_gr_DHS2[_n-2]-rollmean)^2 + (emp_gr_DHS2[_n-1]-rollmean)^2 + (emp_gr_DHS2[_n]-rollmean)^2)/count1 if f_obs2>10 & _n==_N

bys firmid: replace rollmean = (emp_gr_DHS2[_n-4] + emp_gr_DHS2[_n-3]+emp_gr_DHS2[_n-2]+emp_gr_DHS2[_n-1]+emp_gr_DHS2[_n]+emp_gr_DHS2[_n+1]+emp_gr_DHS2[_n+2]+emp_gr_DHS2[_n+3]+emp_gr_DHS2[_n+4]+emp_gr_DHS2[_n+5])/count if f_obs2>10 & _n>4 & _n<_N-4
bys firmid: replace rollvol = ((emp_gr_DHS2[_n-4]-rollmean)^2+(emp_gr_DHS2[_n-3]-rollmean)^2 + (emp_gr_DHS2[_n-2]-rollmean)^2 + (emp_gr_DHS2[_n-1]-rollmean)^2 + (emp_gr_DHS2[_n]-rollmean)^2+(emp_gr_DHS2[_n+1]-rollmean)^2+(emp_gr_DHS2[_n+2]-rollmean)^2+(emp_gr_DHS2[_n+3]-rollmean)^2+(emp_gr_DHS2[_n+4]-rollmean)^2+(emp_gr_DHS2[_n+5]-rollmean)^2)/count1 if f_obs2>10 & _n>4 & _n<_N-4

bys firmid: replace stdev_gr10plus = rollvol if f_obs2>10

* Construct rolling volatility measure at the "firmid-year" level
forvalues i = 6(1)10  {
	replace stdev_gr = stdev_gr`i' if f_obs2==`i'
}
replace stdev_gr = stdev_gr10plus if f_obs2>10

save "$PthOut/firm_LBD_master2_rollvol_DHS.dta", replace
